#Warning: the bootstrap takes a few hours to run
rm(list = ls())
gc()

library(lubridate)
library(tidyverse)
library(foreign)
library(ggplot2)
library(ggthemes)
library(estimatr)
library(mice)
library(ggpubr)

df_long = readRDS("data/baseline_clean_mi.rds")

df = as.mids(df_long)

indices = df$data %>% 
  select(c(response_num, names(df$data)[str_detect(names(df$data), "^index")]))

covars = c("syr_origin", "syr_origin_urban", "location_its", "sick", "head_educ2", "toddler", "elderly", "female_headed_hh", "location_district", "hezb_control")
covars2 = c("syr_origin_urban", "location_its", "sick", "head_educ2", "toddler", "elderly", "female_headed_hh", "hezb_control")

source("functions/plot_it.R")
source("functions/effect_plotter3.R")

predictors = names(indices)[2:length(indices)]
predictors_by_info = predictors[!str_detect(predictors, "info")]
predictors_by_info = predictors_by_info[str_detect(predictors_by_info, "syria|regime")]


#NOTE: Uncomment the below if you want to run the bootstrap. Currently it only loads the results starting on line 116
# bootstrap ---------------------------------------------------------------
# n = nrow(df_long %>% filter(.imp == 1))
# boots = 1000
# 
# set.seed(2020)
# 
# t = Sys.time()
# for(j in 1:10){ #j is imputation number
# 
#   d = df_long %>% filter(.imp == j)
# 
#   assign(paste0("betas12_", j), matrix(ncol = length(predictors_by_info) * 2, nrow = boots) %>% as.data.frame()) #betas is the matrix of coefs for each imputation
#   assign(paste0("betasprep_", j), matrix(ncol = length(predictors_by_info) * 2, nrow = boots) %>% as.data.frame()) #betas is the matrix of coefs for each imputation
# 
#   for(k in 1:1000){ #k is the boot number
#     sampled_rows = sample(1:nrow(d), n, replace = T)
#     df_boot = d[sampled_rows,]
# 
#     for(i in 1:length(predictors_by_info)){ #i is regression number. These are the 12 month return regressions
#       assign(paste0("reg_info12_", i), lm_robust(as.formula(paste("rtrn_head_12", paste(c(paste0(predictors_by_info[i], " * index_info_quality"), covars), collapse = " + "), sep = " ~ ")), clusters = location_town, weights = weights, data = df_boot))
#     }
# 
#     for(i in 1:length(predictors_by_info)){ #i is regression number. These are the preparation to return regressions
#       assign(paste0("reg_infoprep_", i), lm_robust(as.formula(paste("prep_pca", paste(c(paste0(predictors_by_info[i], " * index_info_quality"), covars), collapse = " + "), sep = " ~ ")), clusters = location_town, weights = weights, data = df_boot))
#     }
# 
#     #Return 12 months coefficients
#     eval(parse(text = paste0("betas12_", j, "[", k, ", 1]", "<-", reg_info12_1$coefficients["index_safety_syria"])))
#     eval(parse(text = paste0("betas12_", j, "[", k, ", 2]", "<-", reg_info12_1$coefficients["index_safety_syria:index_info_quality"])))
#     eval(parse(text = paste0("betas12_", j, "[", k, ", 3]", "<-", reg_info12_2$coefficients["index_regime_control"])))
#     eval(parse(text = paste0("betas12_", j, "[", k, ", 4]", "<-", reg_info12_2$coefficients["index_regime_control:index_info_quality"])))
#     eval(parse(text = paste0("betas12_", j, "[", k, ", 5]", "<-", reg_info12_3$coefficients["index_econ_syria"])))
#     eval(parse(text = paste0("betas12_", j, "[", k, ", 6]", "<-", reg_info12_3$coefficients["index_econ_syria:index_info_quality"])))
#     eval(parse(text = paste0("betas12_", j, "[", k, ", 7]", "<-", reg_info12_4$coefficients["index_services_syria"])))
#     eval(parse(text = paste0("betas12_", j, "[", k, ", 8]", "<-", reg_info12_4$coefficients["index_services_syria:index_info_quality"])))
#     eval(parse(text = paste0("betas12_", j, "[", k, ", 9]", "<-", reg_info12_5$coefficients["index_networks_syria"])))
#     eval(parse(text = paste0("betas12_", j, "[", k, ", 10]", "<-", reg_info12_5$coefficients["index_networks_syria:index_info_quality"])))
# 
#     #Prepare to return coefficients
#     eval(parse(text = paste0("betasprep_", j, "[", k, ", 1]", "<-", reg_infoprep_1$coefficients["index_safety_syria"])))
#     eval(parse(text = paste0("betasprep_", j, "[", k, ", 2]", "<-", reg_infoprep_1$coefficients["index_safety_syria:index_info_quality"])))
#     eval(parse(text = paste0("betasprep_", j, "[", k, ", 3]", "<-", reg_infoprep_2$coefficients["index_regime_control"])))
#     eval(parse(text = paste0("betasprep_", j, "[", k, ", 4]", "<-", reg_infoprep_2$coefficients["index_regime_control:index_info_quality"])))
#     eval(parse(text = paste0("betasprep_", j, "[", k, ", 5]", "<-", reg_infoprep_3$coefficients["index_econ_syria"])))
#     eval(parse(text = paste0("betasprep_", j, "[", k, ", 6]", "<-", reg_infoprep_3$coefficients["index_econ_syria:index_info_quality"])))
#     eval(parse(text = paste0("betasprep_", j, "[", k, ", 7]", "<-", reg_infoprep_4$coefficients["index_services_syria"])))
#     eval(parse(text = paste0("betasprep_", j, "[", k, ", 8]", "<-", reg_infoprep_4$coefficients["index_services_syria:index_info_quality"])))
#     eval(parse(text = paste0("betasprep_", j, "[", k, ", 9]", "<-", reg_infoprep_5$coefficients["index_networks_syria"])))
#     eval(parse(text = paste0("betasprep_", j, "[", k, ", 10]", "<-", reg_infoprep_5$coefficients["index_networks_syria:index_info_quality"])))
# 
#     print(paste(j, k))
#   }
#   print(j)
# }
# 
# Sys.time() - t
# 
# betas12_1 %>% saveRDS("out/info_boot/betas12_1.rds")
# betas12_2 %>% saveRDS("out/info_boot/betas12_2.rds")
# betas12_3 %>% saveRDS("out/info_boot/betas12_3.rds")
# betas12_4 %>% saveRDS("out/info_boot/betas12_4.rds")
# betas12_5 %>% saveRDS("out/info_boot/betas12_5.rds")
# betas12_6 %>% saveRDS("out/info_boot/betas12_6.rds")
# betas12_7 %>% saveRDS("out/info_boot/betas12_7.rds")
# betas12_8 %>% saveRDS("out/info_boot/betas12_8.rds")
# betas12_9 %>% saveRDS("out/info_boot/betas12_9.rds")
# betas12_10 %>% saveRDS("out/info_boot/betas12_10.rds")
# 
# betasprep_1 %>% saveRDS("out/info_boot/betasprep_1.rds")
# betasprep_2 %>% saveRDS("out/info_boot/betasprep_2.rds")
# betasprep_3 %>% saveRDS("out/info_boot/betasprep_3.rds")
# betasprep_4 %>% saveRDS("out/info_boot/betasprep_4.rds")
# betasprep_5 %>% saveRDS("out/info_boot/betasprep_5.rds")
# betasprep_6 %>% saveRDS("out/info_boot/betasprep_6.rds")
# betasprep_7 %>% saveRDS("out/info_boot/betasprep_7.rds")
# betasprep_8 %>% saveRDS("out/info_boot/betasprep_8.rds")
# betasprep_9 %>% saveRDS("out/info_boot/betasprep_9.rds")
# betasprep_10 %>% saveRDS("out/info_boot/betasprep_10.rds")



# Load bootstrap results --------------------------------------------------
rm(list = ls())
betas12_1 = readRDS("out/info_boot/betas12_1.rds")
betas12_2 = readRDS("out/info_boot/betas12_2.rds")
betas12_3 = readRDS("out/info_boot/betas12_3.rds")
betas12_4 = readRDS("out/info_boot/betas12_4.rds")
betas12_5 = readRDS("out/info_boot/betas12_5.rds")
betas12_6 = readRDS("out/info_boot/betas12_6.rds")
betas12_7 = readRDS("out/info_boot/betas12_7.rds")
betas12_8 = readRDS("out/info_boot/betas12_8.rds")
betas12_9 = readRDS("out/info_boot/betas12_9.rds")
betas12_10 = readRDS("out/info_boot/betas12_10.rds")

betasprep_1 = readRDS("out/info_boot/betasprep_1.rds")
betasprep_2 = readRDS("out/info_boot/betasprep_2.rds")
betasprep_3 = readRDS("out/info_boot/betasprep_3.rds")
betasprep_4 = readRDS("out/info_boot/betasprep_4.rds")
betasprep_5 = readRDS("out/info_boot/betasprep_5.rds")
betasprep_6 = readRDS("out/info_boot/betasprep_6.rds")
betasprep_7 = readRDS("out/info_boot/betasprep_7.rds")
betasprep_8 = readRDS("out/info_boot/betasprep_8.rds")
betasprep_9 = readRDS("out/info_boot/betasprep_9.rds")
betasprep_10 = readRDS("out/info_boot/betasprep_10.rds")

# Finding betas -----------------------------------------------------------
betas12 = NULL
betasprep = NULL

betas12 = betas12_1
betasprep = betasprep_1

for(i in 2:10){
  betas12 = betas12 %>% bind_rows(eval(parse(text = paste0("betas12_", i))))
  betasprep = betasprep %>% bind_rows(eval(parse(text = paste0("betasprep_", i))))
}


names(betas12) = c("safety", "safety_i", "control", "control_i", "econ", "econ_i", "services", "services_i", "networks", "networks_i")
names(betasprep) = c("safety", "safety_i", "control", "control_i", "econ", "econ_i", "services", "services_i", "networks", "networks_i")

betas12$safety_info = betas12$safety + betas12$safety_i
betas12$control_info = betas12$control + betas12$control_i
betas12$econ_info = betas12$econ + betas12$econ_i
betas12$services_info = betas12$services + betas12$services_i
betas12$networks_info = betas12$networks + betas12$networks_i

betasprep$safety_info = betasprep$safety + betasprep$safety_i
betasprep$control_info = betasprep$control + betasprep$control_i
betasprep$econ_info = betasprep$econ + betasprep$econ_i
betasprep$services_info = betasprep$services + betasprep$services_i
betasprep$networks_info = betasprep$networks + betasprep$networks_i


lows90_12 = apply(betas12, 2 , quantile, probs = .05)
lows95_12 = apply(betas12, 2 , quantile, probs = .025)
lows90_prep = apply(betasprep, 2 , quantile, probs = .05)
lows95_prep = apply(betasprep, 2 , quantile, probs = .025)

highs90_12 = apply(betas12, 2 , quantile, probs = .95)
highs95_12 = apply(betas12, 2 , quantile, probs = .975)
highs90_prep = apply(betasprep, 2 , quantile, probs = .95)
highs95_prep = apply(betasprep, 2 , quantile, probs = .975)

vars_12 = matrix(nrow = 10, ncol = 15)
vars_prep = matrix(nrow = 10, ncol = 15)

means_12 = matrix(nrow = 10, ncol = 15)
means_prep = matrix(nrow = 10, ncol = 15)

for(j in 1:15){ #variables
  for(i in 1:10){ #imputations
    means_12[i, j] = mean(betas12[((i-1)*1000+1):(i*1000), j])
    means_prep[i, j] = mean(betasprep[((i-1)*1000+1):(i*1000), j])
    
    vars_12[i, j] = var(betas12[((i-1)*1000+1):(i*1000), j])
    vars_prep[i, j] = var(betasprep[((i-1)*1000+1):(i*1000), j])
  }
}


means_12_fin = NULL
means_prep_fin = NULL

sds_12_fin = NULL
sds_prep_fin = NULL

for(i in 1:15){
  means_12_fin[i] = pool.scalar(Q = means_12[,i], vars_12[,i], n = 3003, k = 1)$qbar
  means_prep_fin[i] = pool.scalar(Q = means_prep[,i], vars_prep[,i], n = 3003, k = 1)$qbar
  
  sds_12_fin[i] = pool.scalar(Q = means_12[,i], vars_12[,i], n = 3003, k = 1)$t %>% sqrt
  sds_prep_fin[i] = pool.scalar(Q = means_prep[,i], vars_prep[,i], n = 3003, k = 1)$t %>% sqrt
}



# creating tibble ---------------------------------------------------------
df_plot = tibble(term = rep(c("safety", "control", "econ", "services", "networks", 
                    "safety_i", "control_i", "econ_i", "services_i", "networks_i", 
                    "safety_info", "control_info", "econ_info", "services_info", "networks_info"), 2),
       outcome = c(rep("Return in 12 months", 15), rep("Prepare to return", 15)),
       
       estimates = c(means_12_fin, means_prep_fin),
       
       sds = c(sds_12_fin, sds_prep_fin),
       
       lows90 = c(lows90_12, lows90_prep),
       highs90 = c(highs90_12, highs90_prep),
       
       lows95 = c(lows95_12, lows95_prep),
       highs95 = c(highs95_12, highs95_prep)
       )

df_plot$lows90_2 = df_plot$estimates - (df_plot$sds) * 1.64
df_plot$lows95_2 = df_plot$estimates - (df_plot$sds) * 1.96

df_plot$highs90_2 = df_plot$estimates + (df_plot$sds) * 1.64
df_plot$highs95_2 = df_plot$estimates + (df_plot$sds) * 1.96

df_plot = df_plot %>% filter(!str_detect(term, "_i$"))

df_plot$`Information Confidence` = c(rep("Low", 5), rep("High", 5), rep("Low", 5), rep("High", 5))
df_plot$term = rep(c("Safety", "Regime control", "Economic well-being", "Services", "Networks"), 4)
df_plot$term = factor(df_plot$term, levels = c("Networks",  "Services", "Economic well-being", "Regime control", "Safety"))

df_plot$outcome = factor(df_plot$outcome, levels = c("Return in 12 months", "Prepare to return"))
df_plot %>% write.csv("out/info_boot/info_boot1_figure.csv")
df_plot %>% 
  ggplot(aes(x = term, y = estimates, colour = `Information Confidence`, group = `Information Confidence`)) + 
  geom_pointrange(aes(ymin = lows95, ymax = highs95), position = position_dodge(width = .5), alpha = .5) +
  geom_pointrange(aes(ymin = lows90, ymax = highs90), position = position_dodge(width = .5)) +
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() + 
  facet_wrap(~outcome, scales = "free_x") + 
  theme_economist() + 
  scale_color_economist() + 
  labs(x = "", y = "") + 
  theme(legend.position = "bottom", legend.title = element_text(size = 13))
ggsave("figures/appendix/info_boot1.pdf", height = 6, width = 8)


df_plot %>% 
  ggplot(aes(x = term, y = estimates, colour = `Information Confidence`, group = `Information Confidence`)) + 
  geom_pointrange(aes(ymin = lows95_2, ymax = highs95_2), position = position_dodge(width = .5), alpha = .5) +
  geom_pointrange(aes(ymin = lows90_2, ymax = highs90_2), position = position_dodge(width = .5)) +
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() + 
  facet_wrap(~outcome, scales = "free_x") + 
  theme_economist() + 
  scale_color_economist() + 
  labs(x = "", y = "") + 
  theme(legend.position = "bottom", legend.title = element_text(size = 13))
ggsave("figures/appendix/info_boot2.pdf", height = 6, width = 8)

